In [1]:
import graphlab
In [2]:
sales = graphlab.SFrame('home_data.gl')
In [1]:
sales
In [8]:
sales.show(view="Scatter Plot", x="sqft_living", y="price")
In [5]:
train_data,test_data = sales.random_split(.8,seed=0)
In [6]:
import graphlab
In [7]:
sales = graphlab.SFrame('home_data.gl')
In [8]:
sales
Out[8]:
In [9]:
train_data,test_data = sales.random_split(.8,seed=0)
In [10]:
sqft_model=graphlab.linear_regression.create(train_data, target='price', features=['sqft_living'])
In [12]:
print test_data['price'].mean()
In [14]:
print sqft_model.evaluate(test_data)
In [15]:
import matplotlib.pyplot as plt
%matplotlib inline
In [16]:
plt.plot(test_data['sqft_living'],test_data['price'],'.',
test_data['sqft_living'],sqft_model.predict(test_data),'-')
Out[16]:
In [17]:
sqft_model.get('coefficients')
Out[17]:
In [18]:
features = ['bedrooms','bathrooms','sqft_living','sqft_lot','floors','zipcode']
In [21]:
sales[features].show()
In [22]:
sales.show(view='BoxWhisker Plot', x='zipcode', y='price')
In [23]:
my_features_model= graphlab.linear_regression.create(train_data,target='price',features=features)
In [27]:
print features
In [29]:
print sqft_model.evaluate(test_data)
print my_features_model.evaluate(test_data)
In [30]:
house1 = sales[sales['id']=='5309101200']
In [31]:
house1
Out[31]:
In [32]:
print house1['price']
In [34]:
print sqft_model.predict(house1)
In [35]:
print my_features_model.predict(house1)
In [36]:
house2 = sales[sales['id']=='1925069082']
In [37]:
print house2
In [38]:
print house2['price']
In [39]:
print sqft_model.predict(house2)
In [40]:
print my_features_model.predict(house2)
In [46]:
expensiveHouses = sales[sales['zipcode']=='98039']
In [47]:
print expensiveHouses
In [48]:
print expensiveHouses['price'].mean()
In [63]:
fraction_finder = sales[(sales['sqft_living'] >= 2000) & (sales['sqft_living'] <= 4000)]
fraction_finder.show()
In [55]:
advanced_features = [
'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode',
'condition', # condition of house
'grade', # measure of quality of construction
'waterfront', # waterfront property
'view', # type of view
'sqft_above', # square feet above ground
'sqft_basement', # square feet in basement
'yr_built', # the year built
'yr_renovated', # the year renovated
'lat', 'long', # the lat-long of the parcel
'sqft_living15', # average sq.ft. of 15 nearest neighbors
'sqft_lot15', # average lot size of 15 nearest neighbors
]
In [56]:
print advanced_features
In [58]:
my_advance_model= graphlab.linear_regression.create(train_data,target='price', features = advanced_features)
In [59]:
print sqft_model.evaluate(test_data)
print my_features_model.evaluate(test_data)
print my_advance_model.evaluate(test_data)
In [ ]: